Load data
Notes regarding the data tables:
MS01_raw.csv - this contains aggregate data for each facility
MS02_raw.csv - this is data from the enrolment form and contains individual-level data for every mother who was either a COVID-19 suspect, suffered a near-mis or died
MS03_raw.csv - daily records for patients from MS02_raw.csv; this has more than 1 records for mothers that were COVID-19 suspects or had near-misses; note that mothers that died later on have their death recorded in MS03_raw.csv and not MS02_raw.csv
MS04_label.csv - contains near-miss review data (all near-misses from MS02_raw.csv and MS03_raw.csv); not all facilities report these: only QECH, Thyolo and Chikwawa report these (though some other facilities also report some of these); in practice not all near-misses in QECH, Thyolo and Chikwawa are reviewed; if patient died before or during review, the review was not done; there is some delay in these data being pushed through to the data portal
MS05_raw.csv - MDSR (maternal death surveillance review); these reviews are done by experts; this should be done for all deaths from MS02_raw.csv and MS03_raw.csv but in practice not all deaths are reviewed; cause of death is reported as free text
dataDate<-"20220317"
ms01<-read.csv(paste(sep="","../data/",dataDate,"/MS01_raw.csv")) # aggregate, facility-level data
ms02<-read.csv(paste(sep="","../data/",dataDate,"/MS02_raw.csv")) # enrolment, individual-level data
ms03<-read.csv(paste(sep="","../data/",dataDate,"/MS03_raw.csv")) # daily records for patients from MS02; more than record for mothers that were COVID suspects or near-misses; mothers that died later on have death recorded in MS03 not MS02
ms04<-read.csv(paste(sep="","../data/",dataDate,"/MS04_label.csv")) # near-miss reviews (all near-misses from MS02 and MS03); not all facilities have these; mostly limited to QECH, Thyolo and Chikwawa; the idea is that all near-misses are reviewed but in practice this is incomplete; completion rates are meant to increase as study goes on
ms05<-read.csv(paste(sep="","../data/",dataDate,"/MS05_raw.csv")) # maternal death surveillance review (by experts); death audits for all deaths from MS02 and MS03 (in practice data not complete; e.g. as of May 2021 only 222 out of 449 deaths reviewed)
epiCurve<-read.csv(file="../data/20210830_EpiCurve/EPI_CURVE_Malawi Monitoring Cases_20200209 - Including 2 wave.csv")
epiCurve$Date<-dmy(epiCurve$Date)
epiCurveJHP<-read.csv("../data/20220318_EpiCurve/time_series_covid19_confirmed_global.csv") # downloaded from JHU CSSE here: https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series
epiCurveJHP.deaths<-read.csv("../data/20220318_EpiCurve/time_series_covid19_deaths_global.csv") # downloaded from JHU CSSE here: https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series
epiCurveJHP<-epiCurveJHP %>%
filter(Country.Region=="Malawi") %>%
dplyr::select(!c(Province.State,Country.Region,Lat,Long))
epiCurveJHP<-data.frame(
Date=mdy(gsub(pattern="^X",replacement="",colnames(epiCurveJHP))),
totalConfirmedCumul=as.numeric(epiCurveJHP[1,])
) %>%
mutate(
totalConfirmed=c(totalConfirmedCumul[1],totalConfirmedCumul[-1]-totalConfirmedCumul[-length(totalConfirmedCumul)])
)
epiCurveJHP.deaths<-epiCurveJHP.deaths %>%
filter(Country.Region=="Malawi") %>%
dplyr::select(!c(Province.State,Country.Region,Lat,Long))
epiCurveJHP.deaths<-data.frame(
Date=mdy(gsub(pattern="^X",replacement="",colnames(epiCurveJHP.deaths))),
totalConfirmedCumul.deaths=as.numeric(epiCurveJHP.deaths[1,])
) %>%
mutate(
totalConfirmed.deaths=c(totalConfirmedCumul.deaths[1],totalConfirmedCumul.deaths[-1]-totalConfirmedCumul.deaths[-length(totalConfirmedCumul.deaths)])
)
epiCurveJHP<-epiCurveJHP %>%
mutate(
totalConfirmedCumul.deaths=epiCurveJHP.deaths$totalConfirmedCumul.deaths[match(Date,epiCurveJHP.deaths$Date)],
totalConfirmed.deaths=epiCurveJHP.deaths$totalConfirmed.deaths[match(Date,epiCurveJHP.deaths$Date)]
) %>%
mutate(
ma14_Cases=ma(totalConfirmed,order=14,centre=TRUE),
ma14_Deaths=ma(totalConfirmed.deaths,order=14,centre=TRUE)
)
rm(epiCurveJHP.deaths)
epiCurveJHP.long<-epiCurveJHP %>%
dplyr::select(Date,totalConfirmed,totalConfirmed.deaths) %>%
pivot_longer(cols=c(totalConfirmed,totalConfirmed.deaths),names_to="type",values_to="count") %>%
mutate(
type=case_when(
type=="totalConfirmed"~"Cases",
type=="totalConfirmed.deaths"~"Deaths",
TRUE~NA_character_
)
)
epiCurveJHP.longMA<-epiCurveJHP %>%
dplyr::select(Date,ma14_Cases,ma14_Deaths) %>%
pivot_longer(cols=c(ma14_Cases,ma14_Deaths),names_to="type",values_to="MA14") %>%
mutate(
type=case_when(
type=="ma14_Cases"~"Cases",
type=="ma14_Deaths"~"Deaths",
TRUE~NA_character_
)
)
epiCurveJHP.long<-epiCurveJHP.long %>%
mutate(
MA14=epiCurveJHP.longMA$MA14[match(paste(sep="_",Date,type),paste(sep="_",epiCurveJHP.longMA$Date,epiCurveJHP.longMA$type))]
)
rm(epiCurveJHP.longMA)
ms01$data_date<-ymd(ms01$data_date)
ms02$data_date<-dmy(ms02$data_date)
ms03$data_date<-dmy(ms03$data_date)
ms04$data_date<-ymd(as.Date(dmy_hms(ms04$data_date)))
ms05$data_date<-ymd(as.Date(dmy_hms(ms05$data_date)))
# filtering out what appear to be misguided missing / N/A value encodings - check with David, Leo, Chikondi and the data managers
for(var in c("birth","cs","md","nd","fsb","msb","anc","pnc","fpc","ccs","bas","conan","pb1","pb2")){
if(sum(ms01[,var]==999,na.rm=TRUE)>0){
print(paste(sep="","Variable ",var," has ",sum(ms01[,var]==999,na.rm=TRUE)," records with a value of 999. This is the code for N/A and these records have had their values set to NA."))
ms01[!is.na(ms01[,var]) & ms01[,var]==999,var]<-NA
}
}
## [1] "Variable anc has 62 records with a value of 999. This is the code for N/A and these records have had their values set to NA."
## [1] "Variable pnc has 130 records with a value of 999. This is the code for N/A and these records have had their values set to NA."
## [1] "Variable fpc has 17 records with a value of 999. This is the code for N/A and these records have had their values set to NA."
## [1] "Variable ccs has 105 records with a value of 999. This is the code for N/A and these records have had their values set to NA."
datSummary<-data.frame(
dataTable=c("MS01","MS02","MS03","MS04","MS05"),
nObs=c(nrow(ms01),nrow(ms02),nrow(ms03),nrow(ms04),nrow(ms05)),
nVars=c(ncol(ms01),ncol(ms02),ncol(ms03),ncol(ms04),ncol(ms05)),
earliestDate=c(min(ms01$data_date,na.rm=T),min(ms02$data_date,na.rm=T),min(ms03$data_date,na.rm=T),min(ms04$data_date,na.rm=T),min(ms05$data_date,na.rm=T)),
latestDate=c(max(ms01$data_date,na.rm=T),max(ms02$data_date,na.rm=T),max(ms03$data_date,na.rm=T),max(ms04$data_date,na.rm=T),max(ms05$data_date,na.rm=T))
)
datSummary %>% kable(caption="Summary of the 5 MATSURV data tables.",col.names=c("Data table","number of observations","number of variables","earliest observation date","most recent observation date"),align = c("r","r","r","c","c")) %>%
kable_styling(full_width = FALSE)
Summary of the 5 MATSURV data tables.
|
Data table
|
number of observations
|
number of variables
|
earliest observation date
|
most recent observation date
|
|
MS01
|
2694
|
61
|
2020-07-27
|
2022-03-15
|
|
MS02
|
2425
|
207
|
2020-04-22
|
2022-03-16
|
|
MS03
|
1818
|
160
|
2017-02-17
|
2022-03-16
|
|
MS04
|
245
|
364
|
2020-06-17
|
2022-03-09
|
|
MS05
|
514
|
350
|
2019-12-28
|
2022-12-24
|
epiMalawi<-epiCurve %>%
filter(!(District %in% c("Mwanza PoE"))) %>%
group_by(Date) %>%
summarise(totalConfirmed=sum(Confirmed),totalConfirmed2=sum(as.integer(Confirmed.total),na.rm=T),.groups="drop")
gEpi<-epiCurveJHP.long %>%
dplyr::filter(Date>=ymd(datePlotStart) & Date<=ymd(datePlotEnd)) %>%
ggplot() +
geom_bar(mapping=aes(x=Date,y=count),stat="identity") +
geom_line(mapping=aes(x=Date,y=MA14),col="steelblue",lwd=1) +
geom_vline(xintercept = ymd("2021-01-01"),lty=2,lwd=1,col="orange") +
geom_vline(xintercept = ymd("2021-06-20"),lty=2,lwd=1,col="orange") +
geom_vline(xintercept = ymd("2021-12-10"),lty=2,lwd=1,col="orange") +
labs(title="Daily confirmed COVID-19 cases (top) and deaths (bottom) in Malawi. Data from JHU CSSE.",caption="Grey bars show daily counts, blue lines show centered 14-day moving averages.\nShown in orange are the proposed time points for the interruptions in the segmented time series analysis: 2021-01-01, 2021-06-20 and 2021-12-10.\n2021-01-01 is a round date and which has the advantage of not conflating anything with the Christmas season.\nFurther, given this date is slightly after the start of the second wave, it allows for a lagged response.\nSimilarly 2021-06-20 and 2021-12-10 were chosen for the third and fourth waves.") +
xlab("Date") +
ylab("Total confirmed daily count.") +
xlim(c(ymd(datePlotStart),ymd(datePlotEnd))) +
facet_wrap(~type,nrow=2,scales = "free_y")
pdf(width=20,height=20,file=paste(sep="",outDir,"EpiCurveByDistrict_WholeOfMalawi_",dateStr,".pdf"))
print(gEpi)
dev.off()
## quartz_off_screen
## 2
#print(gEpi)
Note that for this analysis, we will restrict ourselves to the window from 2020-09-06 to 2021-10-31.
Weekly data reporting - need to aggregate?
Question:
ms01_raw.csv - what is being reported here: daily or weekly births? Looks like there was quite a bit of daily data, then became weekly? Weird peak in October when summarised at weekly level…
gWeekly<-ms01 %>%
mutate(date=floor_date(unit="weeks",ymd(data_date))) %>%
group_by(date) %>%
summarise(births = sum(birth,na.rm=T),.groups="drop") %>%
ggplot(mapping=aes(ymd(date),y=births)) +
geom_bar(stat="identity") +
xlab("date") +
xlim(c(ymd(datePlotStart),ymd(datePlotEnd)))
gDaily<-ms01 %>%
group_by(data_date) %>%
summarise(births = sum(birth,na.rm=T),.groups="drop") %>%
ggplot(mapping=aes(ymd(data_date),y=births)) +
geom_bar(stat="identity") +
xlab("date") +
xlim(c(ymd(datePlotStart),ymd(datePlotEnd)))
grid.arrange(gDaily,gWeekly,nrow=2)

gWeekly<-ms01 %>%
mutate(date=floor_date(unit="weeks",ymd(data_date))) %>%
group_by(date) %>%
summarise(mds = sum(md,na.rm=T),.groups="drop") %>%
ggplot(mapping=aes(ymd(date),y=mds)) +
geom_bar(stat="identity") +
ylab("Maternal deaths") +
xlab("date") +
xlim(c(ymd(datePlotStart),ymd(datePlotEnd)))
gDaily<-ms01 %>%
group_by(data_date) %>%
summarise(mds = sum(md,na.rm=T),.groups="drop") %>%
ggplot(mapping=aes(ymd(data_date),y=mds)) +
geom_bar(stat="identity") +
ylab("Maternal deaths") +
xlab("date") +
xlim(c(ymd(datePlotStart),ymd(datePlotEnd)))
grid.arrange(gDaily,gWeekly,nrow=2)

Number of COVID-suspects and near misses
Question:
nearmiss==0 - should this be treated as ‘No’?
table(ms02$covid,useNA="always") %>%
as.data.frame() %>%
mutate(CovidSuspect=case_when(Var1==0~"No",Var1==1~"Yes")) %>%
dplyr::select(CovidSuspect,Freq) %>%
kable(caption="Total number of COVID-19 suspect cases.",col.names=c("COVID-19 suspicion","Frequency"),row.names = FALSE) %>%
kable_styling(full_width = FALSE)
Total number of COVID-19 suspect cases.
|
COVID-19 suspicion
|
Frequency
|
|
No
|
1852
|
|
Yes
|
571
|
|
NA
|
2
|
table(ms02$nearmiss,useNA="always") %>%
as.data.frame() %>%
mutate(NearMiss=case_when(Var1==0~"No",Var1==1~"Yes")) %>%
dplyr::select(NearMiss,Freq) %>%
kable(caption="Total number of near misses.",col.names=c("Near miss","Frequency"),row.names = FALSE) %>%
kable_styling(full_width = FALSE)
Total number of near misses.
|
Near miss
|
Frequency
|
|
No
|
421
|
|
Yes
|
1209
|
|
NA
|
795
|
gCov<-ms02 %>%
group_by(data_date) %>%
summarise(mds = sum(covid,na.rm=T),.groups="drop") %>%
ggplot(mapping=aes(ymd(data_date),y=mds)) +
geom_bar(stat="identity") +
ylab("COVID-19 suspicions") +
xlab("date") +
xlim(c(ymd(datePlotStart),ymd(datePlotEnd)))
gNearMiss<-ms02 %>%
group_by(data_date) %>%
summarise(mds = sum(nearmiss,na.rm=T),.groups="drop") %>%
ggplot(mapping=aes(ymd(data_date),y=mds)) +
geom_bar(stat="identity") +
ylab("Near misses") +
xlab("date") +
xlim(c(ymd(datePlotStart),ymd(datePlotEnd)))
grid.arrange(nrow=3,gCov,gNearMiss,gEpi)

Summary tables and graphs
Descriptive totals
We first summarise the total institutional live births, maternal and neonatal deaths and ante- and post-natal clinic attendences.
sumDatPeriods<-data.frame(
var=c("Live births","Maternal deaths","Neonatal deaths","Antenatal clinics","Postnatal clinics"),
WholeStudy=NA,
Period1=NA,
Period1Perc=NA,
Period2=NA,
Period2Perc=NA,
Period3=NA,
Period3Perc=NA
)
rownames(sumDatPeriods)<-c("Live births","Maternal deaths","Neonatal deaths","Antenatal clinics","Postnatal clinics")
ms01_studyWindow<-ms01 %>%
dplyr::filter(data_date>=ymd(datePlotStart) & data_date<=ymd(datePlotEnd))
sumDatPeriods["Live births","WholeStudy"]<-sum(ms01_studyWindow$birth,na.rm=T)
sumDatPeriods["Live births","Period1"]<-sum(ms01_studyWindow$birth[ms01_studyWindow$data_date<ymd("2021-01-01")],na.rm=T)
sumDatPeriods["Live births","Period2"]<-sum(ms01_studyWindow$birth[ms01_studyWindow$data_date>=ymd("2021-01-01") & ms01_studyWindow$data_date<ymd("2021-06-20")],na.rm=T)
sumDatPeriods["Live births","Period3"]<-sum(ms01_studyWindow$birth[ms01_studyWindow$data_date>=ymd("2021-06-20")],na.rm=T)
sumDatPeriods["Maternal deaths","WholeStudy"]<-sum(ms01_studyWindow$md,na.rm=T)
sumDatPeriods["Maternal deaths","Period1"]<-sum(ms01_studyWindow$md[ms01_studyWindow$data_date<ymd("2021-01-01")],na.rm=T)
sumDatPeriods["Maternal deaths","Period2"]<-sum(ms01_studyWindow$md[ms01_studyWindow$data_date>=ymd("2021-01-01") & ms01_studyWindow$data_date<ymd("2021-06-20")],na.rm=T)
sumDatPeriods["Maternal deaths","Period3"]<-sum(ms01_studyWindow$md[ms01_studyWindow$data_date>=ymd("2021-06-20")],na.rm=T)
sumDatPeriods["Neonatal deaths","WholeStudy"]<-sum(ms01_studyWindow$nd,na.rm=T)
sumDatPeriods["Neonatal deaths","Period1"]<-sum(ms01_studyWindow$nd[ms01_studyWindow$data_date<ymd("2021-01-01")],na.rm=T)
sumDatPeriods["Neonatal deaths","Period2"]<-sum(ms01_studyWindow$nd[ms01_studyWindow$data_date>=ymd("2021-01-01") & ms01_studyWindow$data_date<ymd("2021-06-20")],na.rm=T)
sumDatPeriods["Neonatal deaths","Period3"]<-sum(ms01_studyWindow$nd[ms01_studyWindow$data_date>=ymd("2021-06-20")],na.rm=T)
sumDatPeriods["Antenatal clinics","WholeStudy"]<-sum(ms01_studyWindow$anc,na.rm=T)
sumDatPeriods["Antenatal clinics","Period1"]<-sum(ms01_studyWindow$anc[ms01_studyWindow$data_date<ymd("2021-01-01")],na.rm=T)
sumDatPeriods["Antenatal clinics","Period2"]<-sum(ms01_studyWindow$anc[ms01_studyWindow$data_date>=ymd("2021-01-01") & ms01_studyWindow$data_date<ymd("2021-06-20")],na.rm=T)
sumDatPeriods["Antenatal clinics","Period3"]<-sum(ms01_studyWindow$anc[ms01_studyWindow$data_date>=ymd("2021-06-20")],na.rm=T)
sumDatPeriods["Postnatal clinics","WholeStudy"]<-sum(ms01_studyWindow$pnc,na.rm=T)
sumDatPeriods["Postnatal clinics","Period1"]<-sum(ms01_studyWindow$pnc[ms01_studyWindow$data_date<ymd("2021-01-01")],na.rm=T)
sumDatPeriods["Postnatal clinics","Period2"]<-sum(ms01_studyWindow$pnc[ms01_studyWindow$data_date>=ymd("2021-01-01") & ms01_studyWindow$data_date<ymd("2021-06-20")],na.rm=T)
sumDatPeriods["Postnatal clinics","Period3"]<-sum(ms01_studyWindow$pnc[ms01_studyWindow$data_date>=ymd("2021-06-20")],na.rm=T)
sumDatPeriods$Period1Perc=round(digits=1,100*sumDatPeriods$Period1/sumDatPeriods$WholeStudy)
sumDatPeriods$Period2Perc=round(digits=1,100*sumDatPeriods$Period2/sumDatPeriods$WholeStudy)
sumDatPeriods$Period3Perc=round(digits=1,100*sumDatPeriods$Period3/sumDatPeriods$WholeStudy)
sumDatPeriods<-format(big.mark=",",scientific=FALSE,sumDatPeriods)
sumDatPeriods$Period1<-paste(sep="",sumDatPeriods$Period1," (",sumDatPeriods$Period1Perc,"%)")
sumDatPeriods$Period2<-paste(sep="",sumDatPeriods$Period2," (",sumDatPeriods$Period2Perc,"%)")
sumDatPeriods$Period3<-paste(sep="",sumDatPeriods$Period3," (",sumDatPeriods$Period3Perc,"%)")
sumDatPeriods %>%
dplyr::select(!c(var,contains("Perc"))) %>%
kable(row.names=TRUE,col.names=c("Whole study period",paste(sep=" - ",datePlotStart,"2020-12-31"),paste(sep=" - ","2021-01-01","2021-06-19"),paste(sep=" - ","2021-06-20",datePlotEnd))) %>%
kable_styling(full_width=FALSE)
|
|
Whole study period
|
2020-09-06 - 2020-12-31
|
2021-01-01 - 2021-06-19
|
2021-06-20 - 2021-10-31
|
|
Live births
|
225,990
|
67,310 (29.8%)
|
88,685 (39.2%)
|
69,995 (31.0%)
|
|
Maternal deaths
|
609
|
176 (28.9%)
|
228 (37.4%)
|
205 (33.7%)
|
|
Neonatal deaths
|
6,699
|
1,916 (28.6%)
|
2,695 (40.2%)
|
2,088 (31.2%)
|
|
Antenatal clinics
|
280,208
|
72,138 (25.7%)
|
117,629 (42.0%)
|
90,441 (32.3%)
|
|
Postnatal clinics
|
108,290
|
37,228 (34.4%)
|
41,501 (38.3%)
|
29,561 (27.3%)
|
Patient characteristics summary table
(TBD)
Time plots
Note: breech births do not seem to be tracked? Variable bb is missing from data frame.
varNames <- c(
`births` = "Live births",
`css` = "Cesarean sections",
`ibs` = "Instrument deliveries",
`mds` = "Maternal deaths",
`nds` = "Neonatal deaths",
`sbs` = "Still births",
`fsbs` = "Still births (fresh)",
`msbs` = "Still births (macerated)",
`ancs` = "Antenatal clinic visits",
`pncs` = "Postnatal clinic visits",
`fpcs` = "Family planning clinic visits",
`ccss` = "Cervical cancer screenings",
`bass` = "Birth asphyxias",
`conans` = "Conenital abnormalities",
`pb1s` = "Birth weight < 1,000g",
`pb2s` = "Birth weight >= 1,000g, < 1,500g",
`ohmds` = "Out-of-hospital maternal deaths",
`ohnds` = "Out-of-hospital neonatal deaths",
`ohfsbs` = "Out-of-hospital still births (fresh)",
`ohmsbs` = "Out-of-hospital still births (macerated)",
`numdocs` = "Number of doctors",
`numcos` = "Number of clinical officers",
`numnurs` = "Number of nurses"
)
ms01Sum<-ms01 %>%
mutate(date=floor_date(unit="weeks",ymd(data_date))) %>%
group_by(date) %>%
summarise(
births = sum(birth,na.rm=T), # live births
css = sum(cs,na.rm=T), # Cesarean sections
ibs = sum(ib,na.rm=T), # instrument deliveries
mds = sum(md,na.rm=T), # maternal deaths
nds = sum(nd,na.rm=T), # neonatal deaths
sbs = sum(fsb+msb,na.rm=T), # all still births
fsbs = sum(fsb,na.rm=T), # fresh still births
msbs = sum(msb,na.rm=T), # macerated still births
ancs = sum(anc,na.rm=T), # antenatal clinic visits
pncs = sum(pnc,na.rm=T), # postnatal clinic visits
fpcs = sum(fpc,na.rm=T), # family planning clinic visits
ccss = sum(ccs,na.rm=T), # cervical cancer screenings
bass = sum(bas,na.rm=T), # birth asphyxias
conans = sum(conan,na.rm=T), # congenital abnormalities
pb1s = sum(pb1,na.rm=T), # live births < 1,000g
pb2s = sum(pb2,na.rm=T), # live births >=1,000g, < 1,500g
ohmds = sum(ohmd,na.rm=T), # out-of-hospital maternal deaths
ohnds = sum(ohnd,na.rm=T), # out-of-hospital neonatal deaths
ohfsbs = sum(ohfsb,na.rm=T), # out-of-hospital fresh still births
ohmsbs = sum(ohmsb,na.rm=T), # out-of-hospital macerated still births
numdocs = sum(numdoc,na.rm=T), # number of doctors
numcos = sum(numco,na.rm=T), # number of clinical officers
numnurs = sum(numnur,na.rm=T), # number of nurses
.groups="drop") %>%
pivot_longer(cols=c(births,css,ibs,mds,nds,sbs,fsbs,msbs,ancs,pncs,fpcs,ccss,bass,conans,pb1s,pb2s,ohmds,ohnds,ohfsbs,ohmsbs,numdocs,numcos,numnurs),names_to="metric",values_to="weeklyCounts") %>%
mutate(metric=fct_relevel(metric,c("births","css","ibs","mds","nds","sbs","fsbs","msbs","ancs","pncs","fpcs","ccss","bass","conans","pb1s","pb2s","ohmds","ohnds","ohfsbs","ohmsbs","numdocs","numcos","numnurs")))
Births
ms01Sum %>%
filter(metric %in% c("births","css","ibs")) %>%
ggplot(mapping=aes(ymd(date),y=weeklyCounts)) +
geom_bar(stat="identity") +
ylab("frequency") +
xlab("date") +
xlim(c(ymd(datePlotStart),ymd(datePlotEnd))) +
labs(title="Weekly reported births.",caption="Data were aggregated by calendar week, with the start date of each week identifying the week.\nOrange dashed lines indicate the chosen interruption dates.") +
geom_vline(xintercept = ymd("2021-01-01"),lty=2,lwd=1,col="orange") +
geom_vline(xintercept = ymd("2021-06-20"),lty=2,lwd=1,col="orange") +
#geom_vline(xintercept = ymd("2021-12-10"),lty=2,lwd=1,col="orange") +
facet_wrap(~metric,nrow=3,scales="free_y",labeller = as_labeller(varNames))

Deaths
ms01Sum %>%
filter(metric %in% c("mds","nds","fsbs","msbs")) %>%
ggplot(mapping=aes(ymd(date),y=weeklyCounts)) +
geom_bar(stat="identity") +
ylab("frequency") +
xlab("date") +
xlim(c(ymd(datePlotStart),ymd(datePlotEnd))) +
labs(title="Weekly reported deaths.",caption="Data were aggregated by calendar week, with the start date of each week identifying the week.\nOrange dashed lines indicate the chosen interruption dates.") +
geom_vline(xintercept = ymd("2021-01-01"),lty=2,lwd=1,col="orange") +
geom_vline(xintercept = ymd("2021-06-20"),lty=2,lwd=1,col="orange") +
#geom_vline(xintercept = ymd("2021-12-10"),lty=2,lwd=1,col="orange") +
facet_wrap(~metric,nrow=4,scales="free_y",labeller = as_labeller(varNames))

Issues during birth
ms01Sum %>%
filter(metric %in% c("bass","conans","pb1s","pb2s")) %>%
ggplot(mapping=aes(ymd(date),y=weeklyCounts)) +
geom_bar(stat="identity") +
ylab("frequency") +
xlab("date") +
xlim(c(ymd(datePlotStart),ymd(datePlotEnd))) +
labs(title="Weekly reported birth issues.",caption="Data were aggregated by calendar week, with the start date of each week identifying the week.\nOrange dashed lines indicate the chosen interruption dates.") +
geom_vline(xintercept = ymd("2021-01-01"),lty=2,lwd=1,col="orange") +
geom_vline(xintercept = ymd("2021-06-20"),lty=2,lwd=1,col="orange") +
geom_vline(xintercept = ymd("2021-12-10"),lty=2,lwd=1,col="orange") +
facet_wrap(~metric,nrow=4,scales="free_y",labeller = as_labeller(varNames))

Clinics
ms01Sum %>%
filter(metric %in% c("ancs","pncs","fpcs","ccss")) %>%
ggplot(mapping=aes(ymd(date),y=weeklyCounts)) +
geom_bar(stat="identity") +
ylab("frequency") +
xlab("date") +
xlim(c(ymd(datePlotStart),ymd(datePlotEnd))) +
labs(title="Weekly reported clinic visits.",caption="Data were aggregated by calendar week, with the start date of each week identifying the week.\nOrange dashed lines indicate the chosen interruption dates.") +
geom_vline(xintercept = ymd("2021-01-01"),lty=2,lwd=1,col="orange") +
geom_vline(xintercept = ymd("2021-06-20"),lty=2,lwd=1,col="orange") +
#geom_vline(xintercept = ymd("2021-12-10"),lty=2,lwd=1,col="orange") +
facet_wrap(~metric,nrow=4,scales="free_y",labeller = as_labeller(varNames))

Out-of-hospital deaths
ms01Sum %>%
filter(metric %in% c("ohmds","ohnds","ohfsbs","ohmsbs")) %>%
ggplot(mapping=aes(ymd(date),y=weeklyCounts)) +
geom_bar(stat="identity") +
ylab("frequency") +
xlab("date") +
xlim(c(ymd(datePlotStart),ymd(datePlotEnd))) +
labs(title="Weekly reported deaths.",caption="Data were aggregated by calendar week, with the start date of each week identifying the week.\nOrange dashed lines indicate the chosen interruption dates.") +
geom_vline(xintercept = ymd("2021-01-01"),lty=2,lwd=1,col="orange") +
geom_vline(xintercept = ymd("2021-06-20"),lty=2,lwd=1,col="orange") +
geom_vline(xintercept = ymd("2021-12-10"),lty=2,lwd=1,col="orange") +
facet_wrap(~metric,nrow=4,scales="free_y",labeller = as_labeller(varNames))

Segmented regression analysis
ms01_weekly<-ms01 %>%
mutate(date=floor_date(unit="weeks",ymd(data_date))) %>%
group_by(date) %>%
summarise(
births = sum(birth,na.rm=T), # live births
css = sum(cs,na.rm=T), # Cesarean sections
ibs = sum(ib,na.rm=T), # instrument deliveries
mds = sum(md,na.rm=T), # maternal deaths
nds = sum(nd,na.rm=T), # neonatal deaths
sbs = sum(fsb+msb,na.rm=T), # all still births
fsbs = sum(fsb,na.rm=T), # fresh still births
msbs = sum(msb,na.rm=T), # macerated still births
ancs = sum(anc,na.rm=T), # antenatal clinic visits
pncs = sum(pnc,na.rm=T), # postnatal clinic visits
fpcs = sum(fpc,na.rm=T), # family planning clinic visits
ccss = sum(ccs,na.rm=T), # cervical cancer screenings
bass = sum(bas,na.rm=T), # birth asphyxias
conans = sum(conan,na.rm=T), # congenital abnormalities
pb1s = sum(pb1,na.rm=T), # live births < 1,000g
pb2s = sum(pb2,na.rm=T), # live births >=1,000g, < 1,500g
ohmds = sum(ohmd,na.rm=T), # out-of-hospital maternal deaths
ohnds = sum(ohnd,na.rm=T), # out-of-hospital neonatal deaths
ohfsbs = sum(ohfsb,na.rm=T), # out-of-hospital fresh still births
ohmsbs = sum(ohmsb,na.rm=T), # out-of-hospital macerated still births
numdocs = sum(numdoc,na.rm=T), # number of doctors
numcos = sum(numco,na.rm=T), # number of clinical officers
numnurs = sum(numnur,na.rm=T), # number of nurses
.groups="drop") %>%
dplyr::filter(date>=datePlotStart & date<=datePlotEnd)
ms01_weekly<-ms01_weekly[order(ms01_weekly$date),] %>%
mutate(
week_num=weekDict$week_num[match(date,weekDict$week)],
covid=ifelse(date<"2021-01-01",0,1),
covid2=ifelse(date<"2021-06-20",0,1),
covid3=ifelse(date<"2021-12-10",0,1)
)
covid_week<-min(ms01_weekly$date[ms01_weekly$covid==1])
covid2_week<-min(ms01_weekly$date[ms01_weekly$covid2==1])
covid3_week<-min(ms01_weekly$date[ms01_weekly$covid3==1])
covid_week_num<-min(ms01_weekly$week_num[ms01_weekly$covid==1])
covid2_week_num<-min(ms01_weekly$week_num[ms01_weekly$covid2==1])
covid3_week_num<-min(ms01_weekly$week_num[ms01_weekly$covid3==1])
ms01_weekly<-ms01_weekly %>%
mutate(
week_num_postcovid=ifelse(covid==0,0,week_num-covid_week_num+1),
week_num_postcovid2=ifelse(covid2==0,0,week_num-covid2_week_num+1),
week_num_postcovid3=ifelse(covid3==0,0,week_num-covid3_week_num+1)
)
## REMOVE THIS LINE ONCE DATA FOR 2021-12-05 is fixed!!!
#ms01_weekly<-ms01_weekly[-nrow(ms01_weekly),]
modFunBirthOffset<-function(dat,predRange=c("2020-07-26 00:00","2021-09-30 23:59"),var,varNameAnnot,ylabAnnot,B=1e3,doBS=TRUE,covidDate="2021-01-01"){
dat<-dat %>%
mutate(sr=get(var))
modIntercept <-try(silent=TRUE,glm.nb(as.formula(paste(sep="",var," ~ 1 + offset(log(births))")), data=dat))
modCF <-try(silent=TRUE,glm.nb(as.formula(paste(sep="",var," ~ week_num + offset(log(births))")), data=dat %>% dplyr::filter(week_num<covid_week_num)))
# modSR <-glm.nb(as.formula(paste(sep="",var," ~ covid + week_num + week_num_postcovid + offset(log(births))")), data=dat)
# modSR.season <-glm.nb(as.formula(paste(sep="",var," ~ covid + week_num + week_num_postcovid + harmonic(week_num,2,52) + offset(log(births))")), data=dat)
modSRcov2 <-try(silent=TRUE,glm.nb(as.formula(paste(sep="",var," ~ covid + covid2 + week_num + week_num_postcovid + week_num_postcovid2 + offset(log(births))")), data=dat))
# modSRcov3 <-glm.nb(as.formula(paste(sep="",var," ~ covid + covid2 + covid3 + week_num + week_num_postcovid + week_num_postcovid2 + week_num_postcovid3 + offset(log(births))")), data=dat)
if(class(modCF)!="try-error" & class(modSRcov2)!="try-error"){
pseudoR2<-1-logLik(modSRcov2)/logLik(modIntercept)
if(doBS){
#tmp<-coef(modSRcov3)
tmp<-coef(modSRcov2)
covidWaves12Effect<-tmp["covid"]-tmp["covid2"]
covidWaves12EffectBS<-numeric(0)
modCFBS<-list()
for(b in 1:B){
modCFBS[[b]]<-try(stop("error"),silent=TRUE)
modBS<-try(stop("error"),silent=TRUE)
while(class(modCFBS[[b]])=="try-error" | class(modBS)=="try-error"){
datBS<-dat[sample(x=1:nrow(dat),size=nrow(dat),replace=TRUE),]
modCFBS[[b]]<-try(silent=TRUE,glm.nb(as.formula(paste(sep="",var," ~ week_num + offset(log(births))")), data=datBS %>% dplyr::filter(week_num<covid_week_num)))
#modBS <-glm.nb(as.formula(paste(sep="",var," ~ covid + covid2 + covid3 + week_num + week_num_postcovid + week_num_postcovid2 + week_num_postcovid3 + offset(log(births))")), data=datBS)
modBS <-try(silent=TRUE,glm.nb(as.formula(paste(sep="",var," ~ covid + covid2 + week_num + week_num_postcovid + week_num_postcovid2 + offset(log(births))")), data=datBS))
}
tmp<-coef(modBS)
covidWaves12EffectBS<-c(covidWaves12EffectBS,tmp["covid"]-tmp["covid2"])
}
covidWaves12EffectCI<-quantile(covidWaves12EffectBS,probs=c(0.025,0.975))
}
predDfCov2<-data.frame(
date=seq(ymd_hm(predRange[1]),ymd_hm(predRange[2]),by="hour"),
births=3500
) %>%
mutate(week_num=(1:n() +7*24-1)/(7*24)) %>%
mutate(
covid=ifelse(week_num<covid_week_num,0,1),
covid2=ifelse(week_num<covid2_week_num,0,1)
) %>%
mutate(
week_num_postcovid=ifelse(covid==0,0,week_num-covid_week_num),
week_num_postcovid2=ifelse(covid2==0,0,week_num-covid2_week_num)
) %>%
as_tibble()
predDfCov2<-predDfCov2 %>%
mutate(
srPredLink=NA,
srPredLinkSE=NA
)
predTmp<-try(silent=TRUE,predict(modSRcov2,newdata=predDfCov2,type="link",se.fit=T))
if(class(predTmp)!="try-error"){
predDfCov2$srPredLink<-predTmp$fit
predDfCov2$srPredLinkSE<-predTmp$se.fit
}
predDfCov2<-predDfCov2 %>%
mutate(
srPred=exp(srPredLink),
srPredLow=exp(srPredLink-qnorm(0.975)*srPredLinkSE),
srPredUpp=exp(srPredLink+qnorm(0.975)*srPredLinkSE)
)
predDfCF<-data.frame(
date=seq(ymd_hm(predRange[1]),ymd_hm(predRange[2]),by="hour"),
births=3500
) %>%
mutate(
week_num=(1:n() +7*24-1)/(7*24)
) %>%
as_tibble() %>%
filter(week_num>=covid_week_num)
predDfCF<-predDfCF %>%
mutate(
srPredLink=NA,
srPredLinkSE=NA,
)
predTmp<-try(silent=TRUE,predict(modCF,newdata=predDfCF,type="link",se.fit=T))
if(class(predTmp)!="try-error"){
predDfCF$srPredLink<-predTmp$fit
predDfCF$srPredLinkSE<-predTmp$se.fit
}
predDfCF<-predDfCF %>%
mutate(
srPred=exp(srPredLink),
srPredLow=exp(srPredLink-qnorm(0.975)*srPredLinkSE),
srPredUpp=exp(srPredLink+qnorm(0.975)*srPredLinkSE)
)
predDfCFWeekly<-data.frame(
date=seq(ymd_hm(predRange[1]),ymd_hm(predRange[2]),by="weeks")
) %>%
mutate(week_num=1:n()) %>%
mutate(
births=dat$births[match(week_num,dat$week_num)]
) %>%
as_tibble() %>%
filter(week_num>=covid_week_num)
predDfCFWeekly<-predDfCFWeekly %>%
mutate(
srPredLink=NA,
srPredLinkSE=NA
)
predTmp<-try(silent=TRUE,predict(modCF,newdata=predDfCFWeekly,type="link",se.fit=T))
if(class(predTmp)!="try-error"){
predDfCFWeekly$srPredLink<-predTmp$fit
predDfCFWeekly$srPredLinkSE<-predTmp$se.fit
}
predDfCFWeekly<-predDfCFWeekly %>%
mutate(
srPred=exp(srPredLink)
)
obsOutcome<-sum(dat[match(predDfCFWeekly$week_num,dat$week_num),var],na.rm=T)
predOutcome<-sum(predDfCFWeekly$srPred[!is.na(dat[match(predDfCFWeekly$week_num,dat$week_num),var])])
predOutcomeLow<-sum(exp(predDfCFWeekly$srPredLink-qnorm(0.975)*predDfCFWeekly$srPredLinkSE)[!is.na(dat[match(predDfCFWeekly$week_num,dat$week_num),var])])
predOutcomeUpp<-sum(exp(predDfCFWeekly$srPredLink+qnorm(0.975)*predDfCFWeekly$srPredLinkSE)[!is.na(dat[match(predDfCFWeekly$week_num,dat$week_num),var])])
diffOutcome<-predOutcome-obsOutcome
diffOutcomeLow<-predOutcomeLow-obsOutcome
diffOutcomeUpp<-predOutcomeUpp-obsOutcome
# if(doBS){
# predOutcomeBS<-rep(0,B)
# diffOutcomeBS<-rep(0,B)
#
# for(b in 1:B){
# tmpPred<-predDfCFWeekly
# u<-rnorm(n=1,mean=0,sd=1) # parametric bootstrapping; we sample a curve rather than individual points
# tmpPred$srPred<-exp(predDfCFWeekly$srPredLink+u*predDfCFWeekly$srPredLinkSE)
#
# # predTmp<-try(silent=TRUE,exp(predict(modCFBS[[b]],newdata=tmpPred,type="link",se.fit=T)$fit))
# # if(class(predTmp)!="try-error"){tmpPred$srPred<-predTmp}
#
# predOutcomeBS[b]<-sum(tmpPred$srPred[!is.na(dat[match(tmpPred$week_num,dat$week_num),var])])
# diffOutcomeBS[b]<-predOutcomeBS[b]-obsOutcome
# }
# }
tmp<-summary(modSRcov2)$coefficients %>%
as.data.frame() %>%
mutate(
Estimate=exp(Estimate),
p.value=`Pr(>|z|)`,
rownum=1:n()
) %>%
mutate(
Estimate=case_when(
rownum==1~1000*Estimate,
TRUE~Estimate
)
) %>%
dplyr::select(c("Estimate","Std. Error","z value","p.value","rownum"))
if(doBS){
print(
tmp %>%
dplyr::select(!rownum) %>%
kable(digits = 4,caption=paste(sep="","Summary of segmented regression analysis for ",tolower(varNameAnnot)," with two interruptions for the start of the second and third Covid-19 waves.\nThe estimates are exponentiated model coefficients, i.e. the intercept estimate is the baseline incidence rate (per 1,000 live births) and all other estimates are rate ratios associated with the different variables.\nStandard errors are for coefficient estimates on the link scale not exponentiated coefficients.\nThe difference in effects (on the link scale) of the second and third Covid-19 waves is ",format(nsmall=2,round(digits=2,covidWaves12Effect))," with 95% confidence interval (",paste(collapse=",",format(nsmall=2,round(digits=2,covidWaves12EffectCI))),").\nThe total difference in outcomes between the counterfactual scenario and observed outcomes from ",covidDate," to ",as.Date(predRange[2])," is ",round(digits=0,diffOutcome)," with 95% CI (",round(digits=0,diffOutcomeLow),", ",round(digits=0,diffOutcomeUpp),").")) %>%
kable_styling(full_width=FALSE) %>%
column_spec(5,bold=ifelse(tmp$p.value<0.05 & tmp$rownum>1,TRUE,FALSE))
)
}else{
print(
tmp %>%
dplyr::select(!rownum) %>%
kable(digits = 4,caption=paste(sep="","Summary of segmented regression analysis for ",tolower(varNameAnnot)," with two interruptions for the start of the second and third Covid-19 waves.")) %>%
kable_styling(full_width=FALSE) %>%
column_spec(5,bold=ifelse(tmp$p.value<0.05 & tmp$rownum>1,TRUE,FALSE))
)
}
gCov2<-ggplot() +
geom_rect(aes(xmin=as_datetime(covid_week), xmax=as_datetime(covid2_week), ymin=-Inf, ymax=Inf), alpha=0.1) +
geom_rect(aes(xmin=as_datetime(covid2_week), xmax=max(c(as_datetime(dat$date),ymd_hm(predRange))), ymin=-Inf, ymax=Inf), alpha=0.2) +
geom_line(aes(y=1000*srPred/births, x=ymd_hms(date)), color="orange", data=predDfCF, lty=2) +
geom_ribbon(aes(ymax=1000*srPredUpp/births, ymin=1000*srPredLow/births, x=date), fill="orange", alpha=0.2, data=predDfCF) +
geom_line(aes(y=1000*srPred/births, x=ymd_hms(date)), color="steelblue", data=predDfCov2) +
geom_ribbon(aes(ymax=1000*srPredUpp/births, ymin=1000*srPredLow/births, x=date), fill="steelblue", alpha=0.3, data=predDfCov2) +
geom_point(aes(y=1000*sr/births, x=as_datetime(date)), data=dat, shape=1) +
ylab(ylabAnnot) +
xlab("Date") +
labs(title=paste(sep="",varNameAnnot," - ITS analysis"),
caption=paste(sep="","\n Dots = observed data.\n Line = fitted model (95% CI) with both step and slope changes due to the COVID-19 second and third waves.\n Shaded areas indicate the second (lighter shade of grey) and third (darker shade) COVID-19 waves in Malawi.\nThe solid blue curve shows the model fit for actual data.\nThe dashed orange line shows the counterfactual scenario of no second & third COVID-19 waves.\nModel AIC = ",round(digits=2,modSRcov2$aic),"; pseudo-R2 = ",round(digits=2,pseudoR2),".")) +
coord_cartesian(xlim=c(ymd_hms(predRange[1]), ymd_hms(predRange[2])),ylim=c(0.75*min(c(1000*dat$sr/dat$births,1000*predDfCF$srPred/predDfCF$births,1000*predDfCov2$srPred/predDfCov2$births,1000*predDfCov2$srPredLow/predDfCov2$births)),min(c(4*max(1000*dat$sr/dat$births),max(c(1.15*1000*dat$sr/dat$births,1000*predDfCF$srPred/predDfCF$births,1.15*1000*predDfCov2$srPred/predDfCov2$births,1.15*1000*predDfCov2$srPredUpp/predDfCov2$births)))))) +
scale_x_datetime(expand = c(0,0)) +
theme_bw() +
theme(legend.title = element_blank())
print(gCov2)
res<-list()
res[[paste(sep="",varNameAnnot,"_modSRCov2")]]<-modSRcov2
res[[paste(sep="",varNameAnnot,"_graphCov2")]]<-gCov2
res[["observedOutcome"]]<-obsOutcome
res[["predictedCounterfactualOutcome"]]<-predOutcome
res[["predictedCounterfactualOutcomeLow"]]<-predOutcomeLow
res[["predictedCounterfactualOutcomeUpp"]]<-predOutcomeUpp
res[["differenceCounterFactualObserved"]]<-diffOutcome
res[["differenceCounterFactualObservedLow"]]<-diffOutcomeLow
res[["differenceCounterFactualObservedUpp"]]<-diffOutcomeUpp
res[["data"]]<-dat
res[["predDfCFWeekly"]]<-predDfCFWeekly
# if(doBS){
# res[["predictedCounterfactualOutcome_BS"]]<-predOutcomeBS
# res[["differenceCounterFactualObserved_BS"]]<-diffOutcomeBS
# }
return(res)
}else{
print("Unable to fit models.")
return(NULL)
}
}
modFunNoOffset<-function(dat,predRange=c("2020-07-26 00:00","2021-09-30 23:59"),var,varNameAnnot,ylabAnnot,B=1e3,doBS=TRUE,covidDate="2021-01-01"){
dat<-dat %>%
mutate(sr=get(var))
modIntercept <-try(silent=TRUE,glm.nb(as.formula(paste(sep="",var," ~ 1")), data=dat))
modCF <-try(silent=TRUE,glm.nb(as.formula(paste(sep="",var," ~ week_num")), data=dat %>% dplyr::filter(week_num<covid_week_num)))
# modSR <-glm.nb(as.formula(paste(sep="",var," ~ covid + week_num + week_num_postcovid")), data=dat)
# modSR.season <-glm.nb(as.formula(paste(sep="",var," ~ covid + week_num + week_num_postcovid + harmonic(week_num,2,52)")), data=dat)
modSRcov2 <-try(silent=TRUE,glm.nb(as.formula(paste(sep="",var," ~ covid + covid2 + week_num + week_num_postcovid + week_num_postcovid2")), data=dat))
# modSRcov3 <-glm.nb(as.formula(paste(sep="",var," ~ covid + covid2 + covid3 + week_num + week_num_postcovid + week_num_postcovid2 + week_num_postcovid3")), data=dat)
if(class(modCF)!="try-error" & class(modSRcov2)!="try-error"){
pseudoR2<-1-logLik(modSRcov2)/logLik(modIntercept)
if(doBS){
#tmp<-coef(modSRcov3)
tmp<-coef(modSRcov2)
covidWaves12Effect<-tmp["covid"]-tmp["covid2"]
covidWaves12EffectBS<-numeric(0)
modCFBS<-list()
for(b in 1:B){
modCFBS[[b]]<-try(stop("error"),silent=TRUE)
modBS<-try(stop("error"),silent=TRUE)
while(class(modCFBS[[b]])=="try-error" | class(modBS)=="try-error"){
datBS<-dat[sample(x=1:nrow(dat),size=nrow(dat),replace=TRUE),]
modCFBS[[b]]<-try(silent=TRUE,glm.nb(as.formula(paste(sep="",var," ~ week_num")), data=datBS %>% dplyr::filter(week_num<covid_week_num)))
#modBS <-glm.nb(as.formula(paste(sep="",var," ~ covid + covid2 + covid3 + week_num + week_num_postcovid + week_num_postcovid2 + week_num_postcovid3")), datBS)
modBS <-try(silent=TRUE,glm.nb(as.formula(paste(sep="",var," ~ covid + covid2 + week_num + week_num_postcovid + week_num_postcovid2")), datBS))
}
tmp<-coef(modBS)
covidWaves12EffectBS<-c(covidWaves12EffectBS,tmp["covid"]-tmp["covid2"])
}
covidWaves12EffectCI<-quantile(covidWaves12EffectBS,probs=c(0.025,0.975))
}
predDfCov2<-data.frame(
date=seq(ymd_hm(predRange[1]),ymd_hm(predRange[2]),by="hour")
) %>%
mutate(week_num=(1:n() +7*24-1)/(7*24)) %>%
mutate(
covid=ifelse(week_num<covid_week_num,0,1),
covid2=ifelse(week_num<covid2_week_num,0,1)
) %>%
mutate(
week_num_postcovid=ifelse(covid==0,0,week_num-covid_week_num),
week_num_postcovid2=ifelse(covid2==0,0,week_num-covid2_week_num)
) %>%
as_tibble()
predDfCov2<-predDfCov2 %>%
mutate(
srPredLink=NA,
srPredLinkSE=NA
)
predTmp<-try(silent=TRUE,predict(modSRcov2,newdata=predDfCov2,type="link",se.fit=T))
if(class(predTmp)!="try-error"){
predDfCov2$srPredLink<-predTmp$fit
predDfCov2$srPredLinkSE<-predTmp$se.fit
}
predDfCov2<-predDfCov2 %>%
mutate(
srPred=exp(srPredLink),
srPredLow=exp(srPredLink-qnorm(0.975)*srPredLinkSE),
srPredUpp=exp(srPredLink+qnorm(0.975)*srPredLinkSE)
)
predDfCF<-data.frame(
date=seq(ymd_hm(predRange[1]),ymd_hm(predRange[2]),by="hour")
) %>%
mutate(
week_num=(1:n() +7*24-1)/(7*24)
) %>%
as_tibble() %>%
filter(week_num>=covid_week_num)
predDfCF<-predDfCF %>%
mutate(
srPredLink=NA,
srPredLinkSE=NA
)
predTmp<-try(silent=TRUE,predict(modCF,newdata=predDfCF,type="link",se.fit=T))
if(class(predTmp)!="try-error"){
predDfCF$srPredLink<-predTmp$fit
predDfCF$srPredLinkSE<-predTmp$se.fit
}
predDfCF<-predDfCF %>%
mutate(
srPred=exp(srPredLink),
srPredLow=exp(srPredLink-qnorm(0.975)*srPredLinkSE),
srPredUpp=exp(srPredLink+qnorm(0.975)*srPredLinkSE)
)
predDfCFWeekly<-data.frame(
date=seq(ymd_hm(predRange[1]),ymd_hm(predRange[2]),by="weeks")
) %>%
mutate(week_num=1:n()) %>%
as_tibble() %>%
filter(week_num>=covid_week_num)
predDfCFWeekly<-predDfCFWeekly %>%
mutate(
srPredLink=NA,
srPredLinkSE=NA
)
predTmp<-try(silent=TRUE,predict(modCF,newdata=predDfCFWeekly,type="link",se.fit=T))
if(class(predTmp)!="try-error"){
predDfCFWeekly$srPredLink<-predTmp$fit
predDfCFWeekly$srPredLinkSE<-predTmp$se.fit
}
predDfCFWeekly<-predDfCFWeekly %>%
mutate(
srPred=exp(srPredLink)
)
obsOutcome<-sum(dat[match(predDfCFWeekly$week_num,dat$week_num),var],na.rm=T)
predOutcome<-sum(predDfCFWeekly$srPred[!is.na(dat[match(predDfCFWeekly$week_num,dat$week_num),var])])
predOutcomeLow<-sum(exp(predDfCFWeekly$srPredLink-qnorm(0.975)*predDfCFWeekly$srPredLinkSE)[!is.na(dat[match(predDfCFWeekly$week_num,dat$week_num),var])])
predOutcomeUpp<-sum(exp(predDfCFWeekly$srPredLink+qnorm(0.975)*predDfCFWeekly$srPredLinkSE)[!is.na(dat[match(predDfCFWeekly$week_num,dat$week_num),var])])
diffOutcome<-predOutcome-obsOutcome
diffOutcomeLow<-predOutcomeLow-obsOutcome
diffOutcomeUpp<-predOutcomeUpp-obsOutcome
# if(doBS){
# predOutcomeBS<-rep(0,B)
# diffOutcomeBS<-rep(0,B)
#
# for(b in 1:B){
# tmpPred<-predDfCFWeekly
# u<-rnorm(n=1,mean=0,sd=1) # parametric bootstrapping; we sample a curve rather than individual points
# tmpPred$srPred<-exp(predDfCFWeekly$srPredLink+u*predDfCFWeekly$srPredLinkSE)
# #tmpPred$srPred<-exp(rnorm(n=nrow(predDfCFWeekly),mean=predDfCFWeekly$srPredLink,sd=predDfCFWeekly$srPredLinkSE))# parametric bootstrapping
#
# # predTmp<-try(silent=TRUE,exp(predict(modCFBS[[b]],newdata=tmpPred,type="link",se.fit=T)$fit))
# # if(class(predTmp)!="try-error"){tmpPred$srPred<-predTmp}
#
# predOutcomeBS[b]<-sum(tmpPred$srPred[!is.na(dat[match(tmpPred$week_num,dat$week_num),var])])
# diffOutcomeBS[b]<-predOutcomeBS[b]-obsOutcome
# }
# }
#print(summary(modSRcov2))
tmp<-summary(modSRcov2)$coefficients %>%
as.data.frame() %>%
mutate(
Estimate=exp(Estimate),
p.value=`Pr(>|z|)`,
rownum=1:n()
) %>%
dplyr::select(c("Estimate","Std. Error","z value","p.value","rownum"))
if(doBS){
print(
tmp %>%
dplyr::select(!rownum) %>%
kable(digits = 4,caption=paste(sep="","Summary of segmented regression analysis for ",tolower(varNameAnnot)," with two interruptions for the start of the second and third Covid-19 waves.\nThe estimates are exponentiated model coefficients, i.e. the intercept estimate is the baseline weekly incidence rate and all other estimates are rate ratios associated with the different variables.\nStandard errors are for coefficient estimates on the link scale not exponentiated coefficients.\nThe difference in effects (on the link scale) of the second and third Covid-19 waves is ",format(nsmall=2,round(digits=2,covidWaves12Effect))," with 95% confidence interval (",paste(collapse=",",format(nsmall=2,round(digits=2,covidWaves12EffectCI))),").\nThe total difference in outcomes between the counterfactual scenario and observed outcomes from ",covidDate," to ",as.Date(predRange[2])," is ",round(digits=0,diffOutcome)," with 95% CI (",round(digits=0,diffOutcomeLow),", ",round(digits=0,diffOutcomeUpp),").")) %>%
kable_styling(full_width=FALSE) %>%
column_spec(5,bold=ifelse(tmp$p.value<0.05 & tmp$rownum>1,TRUE,FALSE))
)
}else{
print(
tmp %>%
dplyr::select(!rownum) %>%
kable(digits = 4,caption=paste(sep="","Summary of segmented regression analysis for ",tolower(varNameAnnot)," with three interruptions for the start of the second and third Covid-19 waves.")) %>%
kable_styling(full_width=FALSE) %>%
column_spec(5,bold=ifelse(tmp$p.value<0.05 & tmp$rownum>1,TRUE,FALSE))
)
}
gCov2<-ggplot() +
geom_rect(aes(xmin=as_datetime(covid_week), xmax=as_datetime(covid2_week), ymin=-Inf, ymax=Inf), alpha=0.1) +
geom_rect(aes(xmin=as_datetime(covid2_week), xmax=max(c(as_datetime(dat$date),ymd_hm(predRange))), ymin=-Inf, ymax=Inf), alpha=0.2) +
geom_line(aes(y=srPred, x=ymd_hms(date)), color="orange", data=predDfCF, lty=2) +
geom_ribbon(aes(ymax=srPredUpp, ymin=srPredLow, x=date), fill="orange", alpha=0.2, data=predDfCF) +
geom_line(aes(y=srPred, x=ymd_hms(date)), color="steelblue", data=predDfCov2) +
geom_ribbon(aes(ymax=srPredUpp, ymin=srPredLow, x=date), fill="steelblue", alpha=0.3, data=predDfCov2) +
geom_point(aes(y=sr, x=as_datetime(date)), data=dat, shape=1) +
ylab(ylabAnnot) +
xlab("Date") +
labs(title=paste(sep="",varNameAnnot," - ITS analysis"),
caption=paste(sep="","\n Dots = observed data.\n Line = fitted model (95% CI) with both step and slope changes due to the second and third COVID-19 waves.\n Shaded areas indicate the second (lighter shade of grey) and third (darker shade) COVID-19 waves in Malawi.\nThe solid blue curve shows the model fit for actual data.\nThe dashed orange line shows the counterfactual scenario of no second & third COVID-19 waves.\nModel AIC = ",round(digits=2,modSRcov2$aic),"; pseudo-R2 = ",round(digits=2,pseudoR2),".")) +
coord_cartesian(xlim=c(ymd_hms(predRange[1]), ymd_hms(predRange[2])),ylim=c(0.75*min(c(dat$sr,predDfCF$srPred,predDfCov2$srPred,predDfCov2$srPredLow)),min(c(max(4*dat$sr),max(c(1.15*dat$sr,predDfCF$srPred,1.15*predDfCov2$srPred,1.15*predDfCov2$srPredUpp)))))) +
scale_x_datetime(expand = c(0,0)) +
theme_bw() +
theme(legend.title = element_blank())
print(gCov2)
res<-list()
res[[paste(sep="",varNameAnnot,"_modSRCov2")]]<-modSRcov2
res[[paste(sep="",varNameAnnot,"_graphCov2")]]<-gCov2
res[["observedOutcome"]]<-obsOutcome
res[["predictedCounterfactualOutcome"]]<-predOutcome
res[["predictedCounterfactualOutcomeLow"]]<-predOutcomeLow
res[["predictedCounterfactualOutcomeUpp"]]<-predOutcomeUpp
res[["differenceCounterFactualObserved"]]<-diffOutcome
res[["differenceCounterFactualObservedLow"]]<-diffOutcomeLow
res[["differenceCounterFactualObservedUpp"]]<-diffOutcomeUpp
res[["data"]]<-dat
res[["predDfCFWeekly"]]<-predDfCFWeekly
# if(doBS){
# res[["predictedCounterfactualOutcome_BS"]]<-predOutcomeBS
# res[["differenceCounterFactualObserved_BS"]]<-diffOutcomeBS
# }
return(res)
}else{
print("Unable to fit models.")
return(NULL)
}
}
set.seed(20220518)
mod<-list()
Nationwide
Births
mod$births<-modFunNoOffset(dat=ms01_weekly,predRange=c(paste(sep="",datePlotStart," 00:00"),paste(sep="",datePlotEnd," 23:59")),var="births",varNameAnnot="Weekly live birth rate",ylabAnnot="Live births (per week)")
Summary of segmented regression analysis for weekly live birth rate with two interruptions for the start of the second and third Covid-19 waves. The estimates are exponentiated model coefficients, i.e. the intercept estimate is the baseline weekly incidence rate and all other estimates are rate ratios associated with the different variables. Standard errors are for coefficient estimates on the link scale not exponentiated coefficients. The difference in effects (on the link scale) of the second and third Covid-19 waves is -0.13 with 95% confidence interval (-0.30, 0.03). The total difference in outcomes between the counterfactual scenario and observed outcomes from 2021-01-01 to 2021-10-31 is -31375 with 95% CI (-86277, 79769).
|
|
Estimate
|
Std. Error
|
z value
|
p.value
|
|
(Intercept)
|
4652.9444
|
0.0635
|
132.9638
|
0.0000
|
|
covid
|
0.9266
|
0.0786
|
-0.9696
|
0.3322
|
|
covid2
|
1.0564
|
0.0765
|
0.7170
|
0.4734
|
|
week_num
|
0.9878
|
0.0062
|
-1.9837
|
0.0473
|
|
week_num_postcovid
|
1.0134
|
0.0072
|
1.8490
|
0.0645
|
|
week_num_postcovid2
|
0.9964
|
0.0061
|
-0.5912
|
0.5544
|

df<-data.frame(
week_num=mod$births$data$week_num,
residuals=residuals(mod$births$`Weekly live birth rate_modSRCov2`,type="deviance"),
fitted_values=predict(newdata=mod$births$data,object=mod$births$`Weekly live birth rate_modSRCov2`)
)
df %>% ggplot(mapping=aes(x=fitted_values,y=residuals)) +
geom_point() +
theme_light() +
xlab("fitted values") +
ylab("deviance residuals")

acf(df$res)

pacf(df$res)

Maternal deaths
mod$mds<-modFunBirthOffset(dat=ms01_weekly,predRange=c(paste(sep="",datePlotStart," 00:00"),paste(sep="",datePlotEnd," 23:59")),var="mds",varNameAnnot="Maternal death rate",ylabAnnot="Maternal deaths (per 1,000 live births)")
Summary of segmented regression analysis for maternal death rate with two interruptions for the start of the second and third Covid-19 waves. The estimates are exponentiated model coefficients, i.e. the intercept estimate is the baseline incidence rate (per 1,000 live births) and all other estimates are rate ratios associated with the different variables. Standard errors are for coefficient estimates on the link scale not exponentiated coefficients. The difference in effects (on the link scale) of the second and third Covid-19 waves is -0.21 with 95% confidence interval (-0.77, 0.34). The total difference in outcomes between the counterfactual scenario and observed outcomes from 2021-01-01 to 2021-10-31 is -81 with 95% CI (-283, 519).
|
|
Estimate
|
Std. Error
|
z value
|
p.value
|
|
(Intercept)
|
2.6878
|
0.1875
|
-31.5698
|
0.0000
|
|
covid
|
1.6686
|
0.2361
|
2.1688
|
0.0301
|
|
covid2
|
2.0506
|
0.2410
|
2.9800
|
0.0029
|
|
week_num
|
0.9954
|
0.0187
|
-0.2440
|
0.8073
|
|
week_num_postcovid
|
0.9646
|
0.0221
|
-1.6301
|
0.1031
|
|
week_num_postcovid2
|
1.0291
|
0.0187
|
1.5394
|
0.1237
|

df<-data.frame(
week_num=mod$mds$data$week_num,
residuals=residuals(mod$mds$`Maternal death rate_modSRCov2`,type="deviance"),
fitted_values=predict(newdata=mod$mds$data,object=mod$mds$`Maternal death rate_modSRCov2`)
)
df %>% ggplot(mapping=aes(x=fitted_values,y=residuals)) +
geom_point() +
theme_light() +
xlab("fitted values") +
ylab("deviance residuals")

acf(df$res)

pacf(df$res)

Neonatal deaths
mod$nds<-modFunBirthOffset(dat=ms01_weekly,predRange=c(paste(sep="",datePlotStart," 00:00"),paste(sep="",datePlotEnd," 23:59")),var="nds",varNameAnnot="Neonatal death rate",ylabAnnot="Neonatal deaths (per 1,000 live births)")
Summary of segmented regression analysis for neonatal death rate with two interruptions for the start of the second and third Covid-19 waves. The estimates are exponentiated model coefficients, i.e. the intercept estimate is the baseline incidence rate (per 1,000 live births) and all other estimates are rate ratios associated with the different variables. Standard errors are for coefficient estimates on the link scale not exponentiated coefficients. The difference in effects (on the link scale) of the second and third Covid-19 waves is 0.09 with 95% confidence interval (-0.12, 0.25). The total difference in outcomes between the counterfactual scenario and observed outcomes from 2021-01-01 to 2021-10-31 is 1987 with 95% CI (-1180, 8715).
|
|
Estimate
|
Std. Error
|
z value
|
p.value
|
|
(Intercept)
|
25.9384
|
0.0713
|
-51.2538
|
0.0000
|
|
covid
|
0.9930
|
0.0880
|
-0.0800
|
0.9362
|
|
covid2
|
0.9066
|
0.0871
|
-1.1261
|
0.2601
|
|
week_num
|
1.0127
|
0.0069
|
1.8172
|
0.0692
|
|
week_num_postcovid
|
0.9832
|
0.0081
|
-2.0952
|
0.0362
|
|
week_num_postcovid2
|
1.0184
|
0.0069
|
2.6504
|
0.0080
|

df<-data.frame(
week_num=mod$nds$data$week_num,
residuals=residuals(mod$nds$`Neonatal death rate_modSRCov2`,type="deviance"),
fitted_values=predict(newdata=mod$nds$data,object=mod$nds$`Neonatal death rate_modSRCov2`)
)
df %>% ggplot(mapping=aes(x=fitted_values,y=residuals)) +
geom_point() +
theme_light() +
xlab("fitted values") +
ylab("deviance residuals")

acf(df$res)

pacf(df$res)

Still births (all)
mod$sbs<-modFunBirthOffset(dat=ms01_weekly,predRange=c(paste(sep="",datePlotStart," 00:00"),paste(sep="",datePlotEnd," 23:59")),var="sbs",varNameAnnot="Stillbirth rate",ylabAnnot="Stillbirths (per 1,000 live births)")
Summary of segmented regression analysis for stillbirth rate with two interruptions for the start of the second and third Covid-19 waves. The estimates are exponentiated model coefficients, i.e. the intercept estimate is the baseline incidence rate (per 1,000 live births) and all other estimates are rate ratios associated with the different variables. Standard errors are for coefficient estimates on the link scale not exponentiated coefficients. The difference in effects (on the link scale) of the second and third Covid-19 waves is -0.23 with 95% confidence interval (-0.39,-0.03). The total difference in outcomes between the counterfactual scenario and observed outcomes from 2021-01-01 to 2021-10-31 is 112 with 95% CI (-1038, 1820).
|
|
Estimate
|
Std. Error
|
z value
|
p.value
|
|
(Intercept)
|
24.4789
|
0.0509
|
-72.8962
|
0.0000
|
|
covid
|
1.0355
|
0.0667
|
0.5226
|
0.6012
|
|
covid2
|
1.2980
|
0.0649
|
4.0200
|
0.0001
|
|
week_num
|
1.0012
|
0.0051
|
0.2406
|
0.8099
|
|
week_num_postcovid
|
0.9910
|
0.0060
|
-1.4954
|
0.1348
|
|
week_num_postcovid2
|
1.0032
|
0.0051
|
0.6160
|
0.5379
|

df<-data.frame(
week_num=mod$sbs$data$week_num,
residuals=residuals(mod$sbs$`Stillbirth rate_modSRCov2`,type="deviance"),
fitted_values=predict(newdata=mod$sbs$data,object=mod$sbs$`Stillbirth rate_modSRCov2`)
)
df %>% ggplot(mapping=aes(x=fitted_values,y=residuals)) +
geom_point() +
theme_light() +
xlab("fitted values") +
ylab("deviance residuals")

acf(df$res)

pacf(df$res)

Fresh still births
mod$fsbs<-modFunBirthOffset(dat=ms01_weekly,predRange=c(paste(sep="",datePlotStart," 00:00"),paste(sep="",datePlotEnd," 23:59")),var="fsbs",varNameAnnot="Fresh stillbirth rate",ylabAnnot="Fresh stillbirths (per 1,000 live births)")
Summary of segmented regression analysis for fresh stillbirth rate with two interruptions for the start of the second and third Covid-19 waves. The estimates are exponentiated model coefficients, i.e. the intercept estimate is the baseline incidence rate (per 1,000 live births) and all other estimates are rate ratios associated with the different variables. Standard errors are for coefficient estimates on the link scale not exponentiated coefficients. The difference in effects (on the link scale) of the second and third Covid-19 waves is -0.37 with 95% confidence interval (-0.58,-0.13). The total difference in outcomes between the counterfactual scenario and observed outcomes from 2021-01-01 to 2021-10-31 is 571 with 95% CI (-459, 2476).
|
|
Estimate
|
Std. Error
|
z value
|
p.value
|
|
(Intercept)
|
12.2025
|
0.0752
|
-58.6001
|
0.0000
|
|
covid
|
0.9401
|
0.0975
|
-0.6341
|
0.5260
|
|
covid2
|
1.3642
|
0.0963
|
3.2264
|
0.0013
|
|
week_num
|
1.0071
|
0.0074
|
0.9476
|
0.3433
|
|
week_num_postcovid
|
0.9839
|
0.0089
|
-1.8276
|
0.0676
|
|
week_num_postcovid2
|
1.0059
|
0.0076
|
0.7805
|
0.4351
|

df<-data.frame(
week_num=mod$fsbs$data$week_num,
residuals=residuals(mod$fsbs$`Fresh stillbirth rate_modSRCov2`,type="deviance"),
fitted_values=predict(newdata=mod$fsbs$data,object=mod$fsbs$`Fresh stillbirth rate_modSRCov2`)
)
df %>% ggplot(mapping=aes(x=fitted_values,y=residuals)) +
geom_point() +
theme_light() +
xlab("fitted values") +
ylab("deviance residuals")

acf(df$res)

pacf(df$res)

Macerated still births
mod$msbs<-modFunBirthOffset(dat=ms01_weekly,predRange=c(paste(sep="",datePlotStart," 00:00"),paste(sep="",datePlotEnd," 23:59")),var="msbs",varNameAnnot="Macerated stillbirth rate",ylabAnnot="Macerated stillbirths (per 1,000 live births)")
Summary of segmented regression analysis for macerated stillbirth rate with two interruptions for the start of the second and third Covid-19 waves. The estimates are exponentiated model coefficients, i.e. the intercept estimate is the baseline incidence rate (per 1,000 live births) and all other estimates are rate ratios associated with the different variables. Standard errors are for coefficient estimates on the link scale not exponentiated coefficients. The difference in effects (on the link scale) of the second and third Covid-19 waves is -0.07 with 95% confidence interval (-0.36, 0.20). The total difference in outcomes between the counterfactual scenario and observed outcomes from 2021-01-01 to 2021-10-31 is -390 with 95% CI (-1025, 805).
|
|
Estimate
|
Std. Error
|
z value
|
p.value
|
|
(Intercept)
|
12.3566
|
0.0768
|
-57.2057
|
0.0000
|
|
covid
|
1.1491
|
0.1006
|
1.3819
|
0.1670
|
|
covid2
|
1.2359
|
0.0962
|
2.2009
|
0.0277
|
|
week_num
|
0.9946
|
0.0078
|
-0.6986
|
0.4848
|
|
week_num_postcovid
|
0.9990
|
0.0091
|
-0.1153
|
0.9082
|
|
week_num_postcovid2
|
1.0003
|
0.0077
|
0.0437
|
0.9651
|

df<-data.frame(
week_num=mod$msbs$data$week_num,
residuals=residuals(mod$msbs$`Macerated stillbirth rate_modSRCov2`,type="deviance"),
fitted_values=predict(newdata=mod$msbs$data,object=mod$msbs$`Macerated stillbirth rate_modSRCov2`)
)
df %>% ggplot(mapping=aes(x=fitted_values,y=residuals)) +
geom_point() +
theme_light() +
xlab("fitted values") +
ylab("deviance residuals")

acf(df$res)

pacf(df$res)

Antenatal clinics
mod$ancs<-modFunNoOffset(dat=ms01_weekly,predRange=c(paste(sep="",datePlotStart," 00:00"),paste(sep="",datePlotEnd," 23:59")),var="ancs",varNameAnnot="Weekly antenatal clinic visit rate",ylabAnnot="Antenatal clinic visits (per week)")
Summary of segmented regression analysis for weekly antenatal clinic visit rate with two interruptions for the start of the second and third Covid-19 waves. The estimates are exponentiated model coefficients, i.e. the intercept estimate is the baseline weekly incidence rate and all other estimates are rate ratios associated with the different variables. Standard errors are for coefficient estimates on the link scale not exponentiated coefficients. The difference in effects (on the link scale) of the second and third Covid-19 waves is 0.07 with 95% confidence interval (-0.09, 0.23). The total difference in outcomes between the counterfactual scenario and observed outcomes from 2021-01-01 to 2021-10-31 is -17435 with 95% CI (-89397, 109892).
|
|
Estimate
|
Std. Error
|
z value
|
p.value
|
|
(Intercept)
|
4477.1083
|
0.0564
|
148.9463
|
0.0000
|
|
covid
|
1.1099
|
0.0697
|
1.4953
|
0.1348
|
|
covid2
|
1.0344
|
0.0679
|
0.4978
|
0.6186
|
|
week_num
|
0.9993
|
0.0055
|
-0.1215
|
0.9033
|
|
week_num_postcovid
|
0.9980
|
0.0064
|
-0.3065
|
0.7592
|
|
week_num_postcovid2
|
1.0024
|
0.0054
|
0.4437
|
0.6572
|

df<-data.frame(
week_num=mod$ancs$data$week_num,
residuals=residuals(mod$ancs$`Weekly antenatal clinic visit rate_modSRCov2`,type="deviance"),
fitted_values=predict(newdata=mod$ancs$data,object=mod$ancs$`Weekly antenatal clinic visit rate_modSRCov2`)
)
df %>% ggplot(mapping=aes(x=fitted_values,y=residuals)) +
geom_point() +
theme_light() +
xlab("fitted values") +
ylab("deviance residuals")

acf(df$res)

pacf(df$res)

Postnatal clinics
ms01_weekly_pnc<-ms01_weekly %>%
mutate(
covid=ifelse(date<ymd("2021-01-01")+lubridate::weeks(6),0,1),
covid2=ifelse(date<ymd("2021-06-20")+lubridate::weeks(6),0,1),
covid3=ifelse(date<ymd("2021-12-10")+lubridate::weeks(6),0,1)
)
covid_week_num_orig<-covid_week_num
covid2_week_num_orig<-covid2_week_num
covid_week_num <- min(ms01_weekly_pnc$week_num[ms01_weekly_pnc$covid==1])
covid2_week_num <- min(ms01_weekly_pnc$week_num[ms01_weekly_pnc$covid2==1])
ms01_weekly_pnc<-ms01_weekly_pnc %>%
mutate(
week_num_postcovid=ifelse(covid==0,0,week_num-covid_week_num+1),
week_num_postcovid2=ifelse(covid2==0,0,week_num-covid2_week_num+1),
week_num_postcovid3=ifelse(covid3==0,0,week_num-covid3_week_num+1)
)
mod$pncs<-modFunNoOffset(dat=ms01_weekly_pnc,predRange=c(paste(sep="",datePlotStart," 00:00"),paste(sep="",datePlotEnd," 23:59")),var="pncs",varNameAnnot="Weekly postnatal clinic visit rate",ylabAnnot="Postnatal clinic visits (per week)")
Summary of segmented regression analysis for weekly postnatal clinic visit rate with two interruptions for the start of the second and third Covid-19 waves. The estimates are exponentiated model coefficients, i.e. the intercept estimate is the baseline weekly incidence rate and all other estimates are rate ratios associated with the different variables. Standard errors are for coefficient estimates on the link scale not exponentiated coefficients. The difference in effects (on the link scale) of the second and third Covid-19 waves is -0.54 with 95% confidence interval (-0.66,-0.43). The total difference in outcomes between the counterfactual scenario and observed outcomes from 2021-01-01 to 2021-10-31 is 36721 with 95% CI (15719, 64754).
|
|
Estimate
|
Std. Error
|
z value
|
p.value
|
|
(Intercept)
|
2283.0513
|
0.0428
|
180.8284
|
0.0000
|
|
covid
|
0.5932
|
0.0583
|
-8.9534
|
0.0000
|
|
covid2
|
1.0130
|
0.0692
|
0.1863
|
0.8522
|
|
week_num
|
1.0017
|
0.0031
|
0.5413
|
0.5883
|
|
week_num_postcovid
|
0.9982
|
0.0043
|
-0.4229
|
0.6724
|
|
week_num_postcovid2
|
1.0179
|
0.0073
|
2.4343
|
0.0149
|

df<-data.frame(
week_num=mod$pncs$data$week_num,
residuals=residuals(mod$pncs$`Weekly postnatal clinic visit rate_modSRCov2`,type="deviance"),
fitted_values=predict(newdata=mod$pncs$data,object=mod$pncs$`Weekly postnatal clinic visit rate_modSRCov2`)
)
df %>% ggplot(mapping=aes(x=fitted_values,y=residuals)) +
geom_point() +
theme_light() +
xlab("fitted values") +
ylab("deviance residuals")

acf(df$res)

pacf(df$res)

covid_week_num<-covid_week_num_orig
covid2_week_num<-covid2_week_num_orig
modAll<-mod
Summarising the above (for some of the outcomes)
sumMods<-function(var){
tabSum<-data.frame(
var=var,
model=c("Malawi (all)"),
Observed=NA,
Counterfactual=NA,
DifferenceAbs=NA,
DifferenceRel=NA
)
if(!is.null(modAll[[var]])){
tabSum[tabSum$model=="Malawi (all)",c("Observed","Counterfactual","DifferenceAbs","DifferenceRel")]<-c(
modAll[[var]]$observedOutcome,
paste(sep="",round(modAll[[var]]$predictedCounterfactualOutcome)," (",round(modAll[[var]]$predictedCounterfactualOutcomeLow),", ",round(modAll[[var]]$predictedCounterfactualOutcomeUpp),")"),
paste(sep="",round(modAll[[var]]$differenceCounterFactualObserved)," (",round(modAll[[var]]$differenceCounterFactualObservedLow),", ",round(modAll[[var]]$differenceCounterFactualObservedUpp),")"),
paste(sep="",format(nsmall=2,round(digits=2,100*modAll[[var]]$differenceCounterFactualObserved/modAll[[var]]$observedOutcome)),"% (",format(nsmall=2,round(digits=2,100*modAll[[var]]$differenceCounterFactualObservedLow/modAll[[var]]$observedOutcome)),"%, ",format(nsmall=2,round(digits=2,100*modAll[[var]]$differenceCounterFactualObservedUpp/modAll[[var]]$observedOutcome)),"%)")
)
}
return(tabSum)
}
tabSum_births<-sumMods("births")
tabSum_mds<-sumMods("mds")
tabSum_nds<-sumMods("nds")
tabSum_fsbs<-sumMods("fsbs")
tabSum_msbs<-sumMods("msbs")
tabSum_ancs<-sumMods("ancs")
tabSum_pncs<-sumMods("pncs")
tabSum<-rbind(
tabSum_births,
tabSum_mds,
tabSum_nds,
tabSum_fsbs,
tabSum_msbs,
tabSum_ancs,
tabSum_pncs
)
tabSum %>%
dplyr::select(!c(var,model)) %>%
knitr::kable(col.names=c("Observed","Predicted (95% CI)","Difference (absolute)","Difference (relative)")) %>%
kableExtra::kable_styling(full_width = FALSE) %>%
pack_rows(index = table(fct_inorder(tabSum[,"var"])))
|
Observed
|
Predicted (95% CI)
|
Difference (absolute)
|
Difference (relative)
|
|
births
|
|
158844
|
127469 (72567, 238613)
|
-31375 (-86277, 79769)
|
-19.75% (-54.32%, 50.22%)
|
|
mds
|
|
431
|
350 (148, 950)
|
-81 (-283, 519)
|
-18.80% (-65.61%, 120.51%)
|
|
nds
|
|
4799
|
6786 (3619, 13514)
|
1987 (-1180, 8715)
|
41.39% (-24.59%, 181.60%)
|
|
fsbs
|
|
1992
|
2563 (1533, 4468)
|
571 (-459, 2476)
|
28.64% (-23.05%, 124.31%)
|
|
msbs
|
|
1966
|
1576 (941, 2771)
|
-390 (-1025, 805)
|
-19.83% (-52.15%, 40.95%)
|
|
ancs
|
|
209292
|
191857 (119895, 319184)
|
-17435 (-89397, 109892)
|
-8.33% (-42.71%, 52.51%)
|
|
pncs
|
|
56502
|
93223 (72221, 121256)
|
36721 (15719, 64754)
|
64.99% (27.82%, 114.60%)
|
write.csv(tabSum,row.names=FALSE,file=paste(sep="",outDir,"summaryTable_observedVsCounterfactual_",dateStr,".csv"))
save(list=c("modAll"),file=paste(sep="",outDir,"allModels_",dateStr,".RData"))